# -*- coding: utf-8 -*-
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule





class EmptyFileSpider(CrawlSpider):
    name = 'empty_file'
    allowed_domains = ['www.baidu.com']
    start_urls = ['http://www.baidu.com/']

    rules = (
        Rule(
            LinkExtractor(allow=r'Items/'),
            callback='parse_item',
            follow=True),
    )

    def parse_item(self, response):


        #这几行源码的注释太low，我们使用数据结构


        i = {}
        # i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
        # i['name'] = response.xpath('//div[@id="name"]').extract()
        # i['description'] = response.xpath('//div[@id="description"]').extract()
        return i
